## === Full Dataset ===
## Cells: 205230
## Genes: 30803
## Assays: RNA, ADT
Stromal cellとして以下のカテゴリーを global1
列から抽出:
# Stromal cell types を定義
stromal_types <- c("Fibroblasts", "Endothelial", "Pericytes", "Lymphatic")
# global1 列に基づいてサブセット
stromal <- subset(full_obj, subset = global1 %in% stromal_types)
cat("=== Stromal Subset ===\n")## === Stromal Subset ===
## Cells: 26623
##
## Cell type composition:
##
## Endothelial Fibroblasts Lymphatic Pericytes
## 8197 15666 532 2228
## === Original Subtypes (ct_subtype) ===
##
## Venous CD34+ Fibroblasts POSTN+ Fibroblasts CXCL12+ Fibroblasts
## 6877 4130 3934 3285
## Pericytes CXCL14+ Fibroblasts LL Fibroblasts KDR+ Arterial
## 2194 1431 1417 871
## MMP+ Fibroblasts SOX5+ Fibroblasts Lymphatics FLI1 + Capillary
## 805 660 535 395
## Cycling Fibroblasts
## 89
# 全体UMAPの中でStromal cellsをハイライト
full_obj$is_stromal <- ifelse(
full_obj$global1 %in% stromal_types,
as.character(full_obj$global1),
"Other"
)
p1 <- DimPlot(full_obj, group.by = "is_stromal",
cols = c("Fibroblasts" = "#e74c3c",
"Endothelial" = "#3498db",
"Pericytes" = "#2ecc71",
"Lymphatic" = "#9b59b6",
"Other" = "grey90"),
pt.size = 0.1, order = stromal_types) +
ggtitle("Full UMAP: Stromal Cells Highlighted") +
theme(plot.title = element_text(face = "bold"))
p1# RNA assay で再正規化
DefaultAssay(stromal) <- "RNA"
stromal <- NormalizeData(stromal, verbose = FALSE)
stromal <- FindVariableFeatures(stromal,
selection.method = "vst",
nfeatures = 3000,
verbose = FALSE)
cat("Variable features:", length(VariableFeatures(stromal)), "\n")## Variable features: 3000
# Top variable features の表示
top20 <- head(VariableFeatures(stromal), 20)
p_vf <- VariableFeaturePlot(stromal)
p_vf <- LabelPoints(plot = p_vf, points = top20, repel = TRUE, xnudge = 0, ynudge = 0)
p_vf + ggtitle("Top Variable Features in Stromal Cells")stromal <- ScaleData(stromal, verbose = FALSE)
stromal <- RunPCA(stromal, npcs = 50, verbose = FALSE)ElbowPlot(stromal, ndims = 50) +
ggtitle("PCA Elbow Plot") +
geom_vline(xintercept = 30, linetype = "dashed", color = "red") +
annotate("text", x = 32, y = max(Stdev(stromal, reduction = "pca")) * 0.8,
label = "PC30", color = "red")p_pca1 <- DimPlot(stromal, reduction = "pca", group.by = "global1") +
ggtitle("PCA: by Cell Type")
p_pca2 <- DimPlot(stromal, reduction = "pca", group.by = "sample") +
ggtitle("PCA: by Sample (before Harmony)")
p_pca1 + p_pca2sample をバッチ変数としてHarmonyで統合:
# Harmony補正後の埋め込みを確認
p_h1 <- DimPlot(stromal, reduction = "harmony", group.by = "global1") +
ggtitle("Harmony: by Cell Type")
p_h2 <- DimPlot(stromal, reduction = "harmony", group.by = "sample") +
ggtitle("Harmony: by Sample (after correction)")
p_h1 + p_h2# Harmony補正済み埋め込みからUMAP・近傍グラフを構築
stromal <- RunUMAP(stromal, reduction = "harmony", dims = 1:n_dims, verbose = FALSE)
stromal <- FindNeighbors(stromal, reduction = "harmony", dims = 1:n_dims, verbose = FALSE)複数の解像度でクラスタリングを実行し、比較:
resolutions <- c(0.3, 0.5, 0.8, 1.0)
for (res in resolutions) {
stromal <- FindClusters(stromal, resolution = res, verbose = FALSE)
}
cat("Cluster counts by resolution:\n")## Cluster counts by resolution:
for (res in resolutions) {
col <- paste0("RNA_snn_res.", res)
cat(sprintf(" res=%.1f: %d clusters\n", res, length(unique(stromal@meta.data[[col]]))))
}## res=0.3: 11 clusters
## res=0.5: 13 clusters
## res=0.8: 19 clusters
## res=1.0: 20 clusters
plots <- list()
for (res in resolutions) {
col <- paste0("RNA_snn_res.", res)
Idents(stromal) <- stromal@meta.data[[col]]
plots[[as.character(res)]] <- DimPlot(stromal, label = TRUE, label.size = 4) +
ggtitle(paste0("Resolution = ", res)) +
NoLegend()
}
wrap_plots(plots, ncol = 2)# res=0.5をデフォルトとして使用(必要に応じて変更)
selected_res <- 0.5
Idents(stromal) <- stromal@meta.data[[paste0("RNA_snn_res.", selected_res)]]
stromal$seurat_clusters <- Idents(stromal)
cat("Selected resolution:", selected_res, "\n")## Selected resolution: 0.5
## Number of clusters: 13
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 6783 4200 3777 3373 1948 1507 1429 1004 772 641 531 370 288
# クラスターと元のglobal1の対応
p_ct1 <- DimPlot(stromal, group.by = "seurat_clusters", label = TRUE) +
ggtitle("Subclusters") + NoLegend()
p_ct2 <- DimPlot(stromal, group.by = "global1") +
ggtitle("Original Cell Type (global1)")
p_ct1 + p_ct2# クラスター vs 元アノテーション のクロス集計
ct_table <- table(stromal$seurat_clusters, stromal$global1)
ct_prop <- prop.table(ct_table, margin = 1) # 行ごとの割合
pheatmap(ct_prop,
color = colorRampPalette(c("white", "#3498db", "#e74c3c"))(50),
cluster_rows = FALSE,
cluster_cols = FALSE,
display_numbers = TRUE,
number_format = "%.2f",
main = "Cluster Composition (proportion by global1)")# クラスター vs ct_subtype
ct2_table <- table(stromal$seurat_clusters, stromal$ct_subtype)
ct2_prop <- prop.table(ct2_table, margin = 1)
# 0でない列のみ残す
ct2_prop <- ct2_prop[, colSums(ct2_prop) > 0]
pheatmap(ct2_prop,
color = colorRampPalette(c("white", "#2ecc71", "#8e44ad"))(50),
cluster_rows = FALSE,
cluster_cols = TRUE,
display_numbers = TRUE,
number_format = "%.2f",
fontsize = 8,
main = "Cluster Composition (proportion by ct_subtype)")# Stromal cell の既知マーカー遺伝子
marker_genes <- c(
# Fibroblasts (general)
"COL1A1", "COL1A2", "DCN", "LUM", "VIM", "PDGFRA",
# Fibroblast subtypes
"CD34", "POSTN", "CXCL12", "CXCL14", "MMP1", "MMP3", "SOX5", "MKI67",
# Endothelial
"PECAM1", "VWF", "CDH5", "KDR", "ACKR1", "FLT1",
# Pericytes / Mural cells
"RGS5", "PDGFRB", "ACTA2", "MCAM", "NOTCH3",
# Lymphatic
"PROX1", "LYVE1", "PDPN", "FLT4", "CCL21"
)
# Seuratオブジェクトに存在する遺伝子のみフィルタ
marker_genes <- marker_genes[marker_genes %in% rownames(stromal)]
DotPlot(stromal, features = marker_genes, group.by = "seurat_clusters") +
RotatedAxis() +
ggtitle("Known Marker Genes by Cluster") +
theme(axis.text.x = element_text(size = 9))# 全クラスターのマーカー遺伝子を検出
all_markers <- FindAllMarkers(stromal,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.5,
verbose = FALSE)
cat("Total significant markers found:", nrow(all_markers), "\n")## Total significant markers found: 11282
# Top 5 per cluster
top5 <- all_markers %>%
group_by(cluster) %>%
slice_max(n = 5, order_by = avg_log2FC)# マーカー遺伝子テーブル表示
top5 %>%
select(cluster, gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
arrange(cluster, desc(avg_log2FC))# Top 3 マーカーのヒートマップ
top3 <- all_markers %>%
group_by(cluster) %>%
slice_max(n = 3, order_by = avg_log2FC) %>%
pull(gene) %>%
unique()
DoHeatmap(stromal, features = top3, size = 3) +
ggtitle("Top 3 Markers per Cluster") +
theme(axis.text.y = element_text(size = 7))# ADT (表面タンパク質) データの確認
DefaultAssay(stromal) <- "ADT"
adt_features <- rownames(stromal[["ADT"]])
cat("Available ADT features:", length(adt_features), "\n")## Available ADT features: 137
## CD86.1, CD274.1, TNFRSF14.1, PVR.1, NECTIN2.1, CD47.1, CD48.1, CD40.1, CD40LG.1, CD52.1, CD3D.1, CD8A.1, NCAM1.1, CD19.1, CD33.1, ITGAX.1, HLA-A.1, PTPRC.1, IL3RA.1, CD7.1, ENG.1, ITGA6.1, CCR4.1, CD4.1, CD44.1, CD14.1, FCGR3A.1, IL2RA.1, PTPRC.2, PDCD1.1 ...
# Stromal関連のADTマーカーを探す
stromal_adt <- c("CD31", "CD34", "CD90", "CD140a", "CD140b", "CD146", "CD54", "CD106")
available_adt <- stromal_adt[stromal_adt %in% adt_features]
if (length(available_adt) > 0) {
# ADTの正規化
stromal <- NormalizeData(stromal, normalization.method = "CLR", margin = 2, verbose = FALSE)
cat("\nAvailable stromal ADT markers:", paste(available_adt, collapse = ", "), "\n")
p_adt_plots <- FeaturePlot(stromal, features = available_adt,
ncol = min(4, length(available_adt)),
reduction = "umap")
print(p_adt_plots)
} else {
cat("No stromal-specific ADT markers found. Showing available markers.\n")
# 利用可能なADTの一部を表示
show_adt <- head(adt_features, 8)
stromal <- NormalizeData(stromal, normalization.method = "CLR", margin = 2, verbose = FALSE)
FeaturePlot(stromal, features = show_adt, ncol = 4, reduction = "umap")
}## No stromal-specific ADT markers found. Showing available markers.
マーカー遺伝子発現パターンとクラスター特性に基づいてアノテーションを付与。 以下は テンプレート であり、実際のクラスタリング結果に基づいて修正が必要:
# === アノテーション辞書 ===
# 各クラスター番号に対して、マーカー遺伝子の発現パターンから
# 細胞タイプ名を割り当てる。
#
# 実行後のDotPlot・ヒートマップ・クロス集計表を参照して修正してください。
# --- ct_subtypeとの対応関係からアノテーションを推定 ---
# クラスターごとに最も多い ct_subtype を取得
cluster_annotation <- stromal@meta.data %>%
group_by(seurat_clusters) %>%
count(ct_subtype) %>%
slice_max(n = 1, order_by = n) %>%
select(seurat_clusters, ct_subtype)
cat("=== Auto-inferred annotation (majority vote from ct_subtype) ===\n")## === Auto-inferred annotation (majority vote from ct_subtype) ===
## seurat_clusters ct_subtype
## 1 0 Venous
## 2 1 POSTN+ Fibroblasts
## 3 2 CD34+ Fibroblasts
## 4 3 CXCL12+ Fibroblasts
## 5 4 Pericytes
## 6 5 LL Fibroblasts
## 7 6 CXCL14+ Fibroblasts
## 8 7 KDR+ Arterial
## 9 8 MMP+ Fibroblasts
## 10 9 SOX5+ Fibroblasts
## 11 10 Lymphatics
## 12 11 FLI1 + Capillary
## 13 12 Pericytes
# アノテーションを適用
annotation_map <- setNames(cluster_annotation$ct_subtype,
cluster_annotation$seurat_clusters)
stromal$cell_type <- unname(annotation_map[as.character(stromal$seurat_clusters)])# === 手動修正セクション ===
# 上記の自動アノテーションが不適切なクラスターがあれば、
# 以下のように個別に修正してください。
#
# 例:
# stromal$cell_type[stromal$seurat_clusters == "5"] <- "Inflammatory Fibroblasts"
# stromal$cell_type[stromal$seurat_clusters == "10"] <- "Tip Endothelial Cells"
#
# 修正後に再度プロットを確認:
cat("=== Final Annotation ===\n")## === Final Annotation ===
##
## CD34+ Fibroblasts CXCL12+ Fibroblasts CXCL14+ Fibroblasts FLI1 + Capillary
## 3777 3373 1429 370
## KDR+ Arterial LL Fibroblasts Lymphatics MMP+ Fibroblasts
## 1004 1507 531 772
## Pericytes POSTN+ Fibroblasts SOX5+ Fibroblasts Venous
## 2236 4200 641 6783
p_annot1 <- DimPlot(stromal, group.by = "seurat_clusters", label = TRUE) +
ggtitle("Subclusters") + NoLegend()
p_annot2 <- DimPlot(stromal, group.by = "cell_type", label = TRUE, label.size = 3, repel = TRUE) +
ggtitle("Annotated Cell Types") + NoLegend()
p_annot1 + p_annot2# アノテーションごとのマーカー発現
Idents(stromal) <- stromal$cell_type
DotPlot(stromal, features = marker_genes) +
RotatedAxis() +
ggtitle("Marker Expression by Annotated Cell Type")# サンプルごとの細胞タイプ組成
comp_table <- table(stromal$sample, stromal$cell_type)
comp_prop <- prop.table(comp_table, margin = 1)
comp_df <- as.data.frame(comp_prop)
names(comp_df) <- c("Sample", "CellType", "Proportion")
ggplot(comp_df, aes(x = Sample, y = Proportion, fill = CellType)) +
geom_bar(stat = "identity", position = "stack") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
ggtitle("Cell Type Composition by Sample") +
scale_fill_brewer(palette = "Set3")# 臨床群(ana)ごとの比較
if ("ana" %in% names(stromal@meta.data)) {
comp_ana <- table(stromal$ana, stromal$cell_type)
comp_ana_prop <- prop.table(comp_ana, margin = 1)
ana_df <- as.data.frame(comp_ana_prop)
names(ana_df) <- c("ANA_Status", "CellType", "Proportion")
ggplot(ana_df, aes(x = ANA_Status, y = Proportion, fill = CellType)) +
geom_bar(stat = "identity", position = "stack") +
theme_minimal() +
ggtitle("Cell Type Composition by ANA Status") +
scale_fill_brewer(palette = "Set3")
}# Stromal サブセットオブジェクトを保存
saveRDS(stromal, "JIAsyno_stromal_subclustered.rds")
cat("Saved: JIAsyno_stromal_subclustered.rds\n")## Saved: JIAsyno_stromal_subclustered.rds
# マーカー遺伝子リストを保存
write.csv(all_markers, "stromal_all_markers.csv", row.names = FALSE)
cat("Saved: stromal_all_markers.csv\n")## Saved: stromal_all_markers.csv
# アノテーション情報を保存
annotation_df <- stromal@meta.data %>%
select(seurat_clusters, cell_type, global1, ct_subtype) %>%
distinct(seurat_clusters, .keep_all = TRUE) %>%
arrange(as.numeric(as.character(seurat_clusters)))
write.csv(annotation_df, "stromal_annotation.csv", row.names = FALSE)
cat("Saved: stromal_annotation.csv\n")## Saved: stromal_annotation.csv
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS 26.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Asia/Tokyo
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] harmony_1.2.0 Rcpp_1.0.11 pheatmap_1.0.12 RColorBrewer_1.1-3
## [5] patchwork_1.1.3 dplyr_1.1.4 ggplot2_3.4.4 Seurat_5.2.1
## [9] SeuratObject_5.0.2 sp_2.1-2
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.15.0 jsonlite_1.8.8 magrittr_2.0.3
## [4] spatstat.utils_3.1-2 farver_2.1.1 rmarkdown_2.25
## [7] vctrs_0.6.5 ROCR_1.0-11 spatstat.explore_3.3-4
## [10] htmltools_0.5.7 sass_0.4.8 sctransform_0.4.1
## [13] parallelly_1.36.0 KernSmooth_2.23-22 bslib_0.6.1
## [16] htmlwidgets_1.6.4 ica_1.0-3 plyr_1.8.9
## [19] plotly_4.10.3 zoo_1.8-12 cachem_1.0.8
## [22] igraph_1.6.0 mime_0.12 lifecycle_1.0.4
## [25] pkgconfig_2.0.3 Matrix_1.6-5 R6_2.5.1
## [28] fastmap_1.1.1 fitdistrplus_1.1-11 future_1.33.1
## [31] shiny_1.8.0 digest_0.6.33 colorspace_2.1-0
## [34] tensor_1.5 RSpectra_0.16-1 irlba_2.3.5.1
## [37] labeling_0.4.3 progressr_0.14.0 fansi_1.0.6
## [40] spatstat.sparse_3.1-0 httr_1.4.7 polyclip_1.10-6
## [43] abind_1.4-5 compiler_4.3.2 withr_2.5.2
## [46] fastDummies_1.7.3 highr_0.10 MASS_7.3-60
## [49] tools_4.3.2 lmtest_0.9-40 httpuv_1.6.13
## [52] future.apply_1.11.1 goftest_1.2-3 glue_1.6.2
## [55] nlme_3.1-163 promises_1.2.1 grid_4.3.2
## [58] Rtsne_0.17 cluster_2.1.4 reshape2_1.4.4
## [61] generics_0.1.3 gtable_0.3.4 spatstat.data_3.1-4
## [64] tidyr_1.3.0 data.table_1.16.0 utf8_1.2.4
## [67] spatstat.geom_3.3-5 RcppAnnoy_0.0.21 ggrepel_0.9.4
## [70] RANN_2.6.1 pillar_1.9.0 stringr_1.5.1
## [73] spam_2.10-0 RcppHNSW_0.5.0 limma_3.58.1
## [76] later_1.3.2 splines_4.3.2 lattice_0.21-9
## [79] survival_3.5-7 deldir_2.0-2 tidyselect_1.2.0
## [82] miniUI_0.1.1.1 pbapply_1.7-2 knitr_1.45
## [85] gridExtra_2.3 scattermore_1.2 RhpcBLASctl_0.23-42
## [88] xfun_0.41 statmod_1.5.0 matrixStats_1.2.0
## [91] stringi_1.8.3 lazyeval_0.2.2 yaml_2.3.8
## [94] evaluate_0.23 codetools_0.2-19 tibble_3.2.1
## [97] cli_3.6.2 uwot_0.1.16 xtable_1.8-4
## [100] reticulate_1.35.0 munsell_0.5.0 jquerylib_0.1.4
## [103] globals_0.16.2 spatstat.random_3.3-2 png_0.1-8
## [106] spatstat.univar_3.1-2 parallel_4.3.2 ellipsis_0.3.2
## [109] presto_1.0.0 dotCall64_1.1-1 listenv_0.9.0
## [112] viridisLite_0.4.2 scales_1.3.0 ggridges_0.5.5
## [115] purrr_1.0.2 rlang_1.1.2 cowplot_1.1.2